import math,random
import quandl
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,SGDRegressor,BayesianRidge,ARDRegression,PassiveAggressiveRegressor,TheilSenRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,StackingRegressor,VotingRegressor
from sklearn.neural_network import MLPRegressor
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
stock = "MSFT"
daysToForecast = 251
def getStockData(stock):
quandl.ApiConfig.api_key = "qWcicxSctVxrP9PhyneG"
allData = quandl.get('WIKI/'+stock)
return allData
def FormatDataForModel(dataArray):
dataArray = dataArray[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
dataArray['HL_PCT'] = (dataArray['Adj. High'] - dataArray['Adj. Close']) / dataArray['Adj. Close'] * 100.0
dataArray['PCT_change'] = (dataArray['Adj. Close'] - dataArray['Adj. Open']) / dataArray['Adj. Open'] * 100.0
dataArray = dataArray[['Adj. Close', 'HL_PCT', 'PCT_change','Adj. Volume']]
dataArray.fillna(-99999, inplace=True)
return dataArray
def PreprocessData(mlData,daysToForecast):
forecast_col = 'Adj. Close'
forecast_out = int(math.ceil(0.12*daysToForecast))
mlData['label'] = mlData[forecast_col].shift(-forecast_out)
#mlData.dropna(inplace=True)
X = np.array(mlData.drop(['label'],1))
X = preprocessing.scale(X)
X_data = X[-daysToForecast:]
X = X[:-daysToForecast]
forecastData = mlData[-daysToForecast:]
trainData= mlData[:-daysToForecast]
y = np.array(trainData['label'])
response = [X,y,X_data,forecastData]
return response
def TrainAndPredict(model,X,y,X_data):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
accuracy = model.score(X_test, y_test)
prediction = model.predict(X_data)
return accuracy, prediction
def addPredictionToForecast(prediction,forecastData):
forecastData = forecastData[['Adj. Close']]
forecastData = forecastData.rename(columns={'Adj. Close':'EOD'})
forecastData['prediction'] = prediction[:]
return forecastData
def GraphPredictions(forecastData,stock):
fig = px.line(forecastData)
fig.update_layout(title=stock,
xaxis_title='Time',
yaxis_title='Price')
fig.show()
def GraphAllData(allData,forecastData,stock):
result = pd.concat([allData['Adj. Close'],forecastData['prediction']],axis =1, sort=False)
fig = px.line(result)
fig.update_layout(title=stock,
xaxis_title='Time',
yaxis_title='Price')
fig.show()
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
print(accuracy)
GraphPredictions(forecastData,stock)
GraphAllData(allData,forecastData,stock)
daysToForecast = 251*3
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
print(accuracy)
GraphPredictions(forecastData,stock)
GraphAllData(allData,forecastData,stock)
daysToForecast = 251*5
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
print(accuracy)
GraphPredictions(forecastData,stock)
GraphAllData(allData,forecastData,stock)